1 Overview of Healthcare Spending

1.1 Healthcare spending over the years

#Current health expenditure (% of GDP) (Source: https://data.worldbank.org/indicator/SH.XPD.CHEX.GD.ZS)
wb_data <- read_csv("data/healthcare-spending/world_bank_data.csv") %>%
  clean_names()

skimr::skim(wb_data)
Data summary
Name wb_data
Number of rows 266
Number of columns 69
_______________________
Column type frequency:
character 7
logical 42
numeric 20
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
country_code 0 1.00 3 3 0 266 0
country_name 0 1.00 4 52 0 266 0
indicator_name 0 1.00 37 37 0 1 0
indicator_code 0 1.00 17 17 0 1 0
region 49 0.82 10 26 0 7 0
income_group 50 0.81 10 19 0 4 0
special_notes 140 0.47 16 1177 0 111 0

Variable type: logical

skim_variable n_missing complete_rate mean count
x1960 266 0 NaN :
x1961 266 0 NaN :
x1962 266 0 NaN :
x1963 266 0 NaN :
x1964 266 0 NaN :
x1965 266 0 NaN :
x1966 266 0 NaN :
x1967 266 0 NaN :
x1968 266 0 NaN :
x1969 266 0 NaN :
x1970 266 0 NaN :
x1971 266 0 NaN :
x1972 266 0 NaN :
x1973 266 0 NaN :
x1974 266 0 NaN :
x1975 266 0 NaN :
x1976 266 0 NaN :
x1977 266 0 NaN :
x1978 266 0 NaN :
x1979 266 0 NaN :
x1980 266 0 NaN :
x1981 266 0 NaN :
x1982 266 0 NaN :
x1983 266 0 NaN :
x1984 266 0 NaN :
x1985 266 0 NaN :
x1986 266 0 NaN :
x1987 266 0 NaN :
x1988 266 0 NaN :
x1989 266 0 NaN :
x1990 266 0 NaN :
x1991 266 0 NaN :
x1992 266 0 NaN :
x1993 266 0 NaN :
x1994 266 0 NaN :
x1995 266 0 NaN :
x1996 266 0 NaN :
x1997 266 0 NaN :
x1998 266 0 NaN :
x1999 266 0 NaN :
x2020 266 0 NaN :
x2021 266 0 NaN :

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
x2000 34 0.87 5.60 2.73 1.51 3.97 5.04 6.79 24.24 ▇▃▁▁▁
x2001 34 0.87 5.72 2.52 1.76 4.06 5.21 6.99 19.19 ▇▆▁▁▁
x2002 33 0.88 5.81 2.41 1.70 4.17 5.15 7.13 14.38 ▅▇▃▁▁
x2003 31 0.88 5.90 2.41 1.92 4.19 5.36 7.18 15.42 ▆▇▃▁▁
x2004 31 0.88 5.95 2.46 1.66 4.29 5.34 7.41 17.26 ▇▇▃▁▁
x2005 31 0.88 5.99 2.71 1.50 4.12 5.28 7.65 21.96 ▇▆▁▁▁
x2006 31 0.88 5.95 2.65 1.52 4.18 5.29 7.72 22.02 ▇▆▁▁▁
x2007 31 0.88 5.96 2.80 1.57 4.12 5.29 7.57 24.23 ▇▅▁▁▁
x2008 31 0.88 6.01 2.85 1.64 4.00 5.30 7.72 23.00 ▇▅▁▁▁
x2009 31 0.88 6.38 2.78 1.84 4.34 5.57 8.06 16.43 ▆▇▅▁▁
x2010 30 0.89 6.27 2.79 1.82 4.25 5.47 8.06 16.88 ▆▇▅▁▁
x2011 29 0.89 6.20 2.76 1.55 4.31 5.47 7.97 16.20 ▆▇▅▁▁
x2012 30 0.89 6.21 2.70 1.26 4.27 5.52 7.91 16.18 ▅▇▃▁▁
x2013 31 0.88 6.33 2.76 1.88 4.43 5.57 7.96 18.60 ▇▇▃▁▁
x2014 31 0.88 6.38 2.82 1.91 4.45 5.50 7.91 19.73 ▇▆▂▁▁
x2015 31 0.88 6.50 2.89 1.82 4.60 5.72 7.83 20.41 ▇▆▂▁▁
x2016 32 0.88 6.52 2.84 1.63 4.56 5.82 7.89 17.37 ▅▇▃▁▁
x2017 31 0.88 6.49 2.78 1.64 4.61 6.03 8.04 16.81 ▅▇▃▁▁
x2018 31 0.88 6.46 2.85 1.60 4.47 5.87 8.03 19.04 ▇▇▃▁▁
x2019 32 0.88 6.53 2.99 1.53 4.48 5.85 7.94 23.96 ▇▆▁▁▁
wb_data <- wb_data %>%
  remove_empty(which=c("rows", "cols"), quiet=FALSE)

# get time series data
wb2 <- wb_data %>%
  pivot_longer(cols=c("x2000", "x2001", "x2002", "x2003", "x2004", "x2005", "x2006", "x2007", "x2008", "x2009", "x2010", "x2011", "x2012", "x2013", "x2014", "x2015", "x2016", "x2017", "x2018", "x2019"),
               names_to="Year",
               values_to="Spending") %>% 
  mutate(Year = as.integer(substr(Year, 2, 5)))
head(wb2)
## # A tibble: 6 × 9
##   country_code country_name indic…¹ indic…² region incom…³ speci…⁴  Year Spend…⁵
##   <chr>        <chr>        <chr>   <chr>   <chr>  <chr>   <chr>   <int>   <dbl>
## 1 ABW          Aruba        Curren… SH.XPD… Latin… High i… <NA>     2000      NA
## 2 ABW          Aruba        Curren… SH.XPD… Latin… High i… <NA>     2001      NA
## 3 ABW          Aruba        Curren… SH.XPD… Latin… High i… <NA>     2002      NA
## 4 ABW          Aruba        Curren… SH.XPD… Latin… High i… <NA>     2003      NA
## 5 ABW          Aruba        Curren… SH.XPD… Latin… High i… <NA>     2004      NA
## 6 ABW          Aruba        Curren… SH.XPD… Latin… High i… <NA>     2005      NA
## # … with abbreviated variable names ¹​indicator_name, ²​indicator_code,
## #   ³​income_group, ⁴​special_notes, ⁵​Spending
eu <- c("AUT", "GBR", "BEL", "BGR", "CYP", "HRV", "CZE", "DNK", "EST", "FIN", "FRA", "DEU", "GRC", "HUN", "IRL", "ITA", "LVA", "LTU", "LUX", "MLT", "NLD", "POL", "PRT", "ROM", "SVK", "SVN", "ESP", "SWE")

wb2 %>%
  filter(country_code %in% eu) %>%
  mutate(is_gbr = ifelse(country_code=="GBR", 1, 0)) %>%
  group_by(is_gbr, Year) %>%
  summarize(spending_avg=mean(Spending, na.rm=TRUE)) %>%
  mutate(label = ifelse(Year != 2019, NA_character_, ifelse(is_gbr==1, "GBR", "EU"))) %>% 
  ggplot(aes(x=Year, y=spending_avg, color=as.factor(is_gbr))) + 
  geom_vline(
    xintercept = seq(1999, 2019, 1), color = "grey80", size = .4) + 
  geom_segment(
    data = tibble(y = seq(0, 11, 1), x1 = 1999, x2 = 2019),
    aes(x = x1, xend = x2, y = y, yend = y),
    inherit.aes = FALSE,
    color = "grey80",
    size = .4
  ) +
  geom_segment(
    data = tibble(y = 0, x1 = 1999, x2 = 2019),
    aes(x = x1, xend = x2, y = y, yend = y),
    inherit.aes = FALSE,
    color = "grey80",
    size = .4
  ) +
  geom_point() +
  geom_line() + 
  labs(title = "<b> UK spends relatively more on healthcare than the EU average</b><br>
       <span style = 'font-size:12pt'>Health Expenditure as % of GDP in <span style='color:#235eb8'> EU countries </span> and <span style='color:#b9bb5e'> the UK </span></span>",
       y = NULL, x = NULL, 
       caption = "Source: The World Bank (https://data.worldbank.org/indicator/SH.XPD.CHEX.GD.ZS)") + 
  theme_nhs() + 
  scale_x_continuous(breaks = seq(1999, 2019, 1), limits = c(1999, 2021), expand = c(0, 0)) + 
  scale_y_continuous(breaks = seq(0, 11, 1), labels = function(x) paste0(x, "%"), limits = c(0, 11)) + 
  theme(axis.text.x = element_text(angle=45, hjust = 1, vjust = 1), 
        axis.ticks = element_line(color = "grey80",),
        plot.title.position = "plot",
        plot.title = ggtext::element_textbox_simple(size=16)) + 
  geom_text_repel(
    aes(color = as.factor(is_gbr), label = label),
    xlim = c(2019, NA),
    family = "Helvetica",
    fontface = "bold",
    size = 4, 
    segment.size = .7,
    segment.alpha = .5,
    segment.linetype = "dotted"
  ) + 
  scale_color_manual(values = c(palette[1], palette[7]))

1.2 Breaking down the cost

dataf <- data.frame(
  Year=c("2007/8","2008/9","2009/10","2010/11","2011/12", "2012/13", "2013/14", "2014/15", "2015/16", "2016/17", "2017/18", "2018/19", "2019/20", "2020/21", "2021/22") ,  
  Funding=c(111.4, 116.6 ,124.1 , 124.5, 125.7, 126, 128.5, 131.2, 134.9, 135.7, 138.4, 141.4, 148.9, 191.01, 190.3))

p = ggplot(data= dataf, aes(x=Year, y=Funding, group = 1)) +
    geom_line(linetype= "dashed", color="blue", size=0.5)+
    geom_point(color="red", size=2) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
 
rect <- data.frame(xmin="2019/20", xmax= "2021/22", ymin=148.9, ymax=Inf)

p + geom_rect(data=rect, aes(xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax),
              color="grey20",
              alpha=0.1,
              inherit.aes = FALSE) +
  labs(
   x = "Year",
    y = "Funding (£bn)",
    title = "COVID-19 caused sharp increases in NHS spending",
    subtitle = "NHS Funding by year",
    caption = "Source:The King's Fund analysis of HM Treasury data"
  ) +

annotate("text", x = "2020/21", y = 153, label = "italic(COVID-19)",size = 2.2,
  parse = TRUE) + 
  theme_nhs() + 
  theme(
    panel.grid.major = element_line(color = "grey80"),  
    panel.grid.minor = element_line(color = "grey80")
  )

datag = data.frame(x = c("Social Protection", "Health", "General Public Services", "Education"), y = c(299, 217, 109, 100))


p <- ggplot(datag, aes(x=x, y=y)) +
  geom_segment(
    aes(x=x, xend=x, y=0, yend=y),
    color=ifelse(datag$x %in% c("Health"), "orange", "grey20"),
    size=ifelse(datag$x %in% c("Health"), 1.3, 0.7)
  ) +
  geom_point(
    color=ifelse(datag$x %in% c("Health"), "orange", "grey20"),
    size=ifelse(datag$x %in% c("Health"), 5, 2)) +
  labs(
    x = "Sector",
    y = "Funding (£bn)",
    title = "Health is the second highest expenditure",
    subtitle = "Government Expenditure by Sector 2021/22",
    caption= "Source:Public spending: a brief introduction-Philip Brien(07 October, 2022)"
  ) +
  coord_flip() + 
  theme_nhs() + 
  theme(
    panel.grid.major = element_line(color = "grey80"),  
    panel.grid.minor = element_line(color = "grey80")
  )
p

#https://digital.nhs.uk/data-and-information/publications/statistical/acute-patient-level-activity-and-costing/2019-20#resources
cost <- read.csv("data/healthcare-spending/Acute Patient Level Activity and Costing 2019-20 CSV data.csv") %>%
  janitor::clean_names()

cost1 <- cost %>%
  filter(summary_type=="Total Cost",
         org_name=="ALL SUBMITTERS",
         breakdown!="Total Cost",
         breakdown_group_2!="Unknown")

kid <- c("0 years", "01-04 years", "05-09 years", "10-14 years")
young_adult <- c("15-19 years", "20-24 years", "25-29 years")
adult <- c("30-34 years", "35-39 years", "40-44 years", "45-49 years")
elderly <- c("50-54 years", "55-59 years", "60-64 years", "65-69 years")
old <- c("70-74 years", "75-79 years", "80-84 years", "85-89 years", "90-94 years", "95 years or older")

cost2 <- cost1 %>%
  filter(breakdown_group_1 %in% c("Female", "Male")) %>%
  summarize(age=ifelse(breakdown_group_2 %in% kid, "0-14", ifelse(breakdown_group_2 %in% young_adult, "15-29", ifelse(breakdown_group_2 %in% adult, "30-49", ifelse(breakdown_group_2 %in% elderly, "50-69", "70+")))),
            breakdown_group_1,
            value) %>%
  group_by(age, breakdown_group_1) %>%
  summarize(total_cost=sum(as.numeric(value)))
## `summarise()` has grouped output by 'age'. You can override using the `.groups`
## argument.
cost_age <- cost2 %>%
  ungroup %>%
  group_by(age)%>%
  summarize(total_age=sum(as.numeric(total_cost)))

cost_gender <- cost2 %>%
  ungroup %>%
  group_by(breakdown_group_1)%>%
  summarize(total_gender=sum(as.numeric(total_cost)))

source <- c(cost_age$age, rep("Cost", times = nrow(cost_gender)))
target <- c(rep("Cost", times = nrow(cost_age)), cost_gender$breakdown_group_1)

df_sanky <- data.frame(
  source = source,
  target = target,
  value = c(cost_age$total_age, cost_gender$total_gender)
)
library(networkD3)
## 
## Attaching package: 'networkD3'
## The following object is masked from 'package:leaflet':
## 
##     JS
nodes <- data.frame(
  name=c(as.character(df_sanky$source), 
  as.character(df_sanky$target)) %>% unique()
)

df_sanky$IDsource <- match(df_sanky$source, nodes$name)-1 
df_sanky$IDtarget <- match(df_sanky$target, nodes$name)-1

node_color <- 'd3.scaleOrdinal() .domain(["0-14", "15-29", "30-49", "50-69", "70+", 
"Cost", "Female", "Male"]) .range(["#235eb8", "#235eb8", "#756f9f" , "#8a8191", "#9c9481", "#aba771", "#b9bb5e", "#c6cf45"])'

p1 <- sankeyNetwork(Links = df_sanky, Nodes = nodes,
              Source = "IDsource", Target = "IDtarget",
              Value = "value", NodeID = "name", 
              colourScale = node_color,
              sinksRight=FALSE) 


p1
#how do costs compare?
cost3 <- cost %>%
  filter(org_name=="ALL SUBMITTERS",
         breakdown!="Total Cost",
         breakdown=="Diagnosis") %>%
  pivot_wider(names_from=summary_type,
              values_from=value) %>%
  janitor::clean_names() %>%
  mutate(cost_per_visit = as.numeric(total_cost)/as.numeric(activity_count)) %>%
  arrange(#desc(activity_count), 
          desc(cost_per_visit))

chosen_diseases <- c("Pregnancy, childbirth and the puerperium", "Cardiac conditions", "Poisoning (inc overdose)", "Diabetes and other endocrinological conditions", "Diseases of the respiratory system", "Bites/stings", "Infectious disease", "Diseases of the digestive system", "Mental and behavioural disorders")

cost3 %>%
  filter(breakdown_group_1 %in% chosen_diseases) %>%
  ggplot(aes(x=fct_reorder(breakdown_group_1, as.numeric(cost_per_visit)), y=cost_per_visit, fill=breakdown_group_1)) +
  geom_bar(stat="identity") +
  coord_flip() +
  theme_nhs() + 
  scale_fill_manual(values=c(palette)) + 
  theme(axis.title.x = element_blank(),
        axis.title.y = element_blank()) +
  labs(title = "Costs per visit vary significantly",
       subtitle="Average cost per visit by category",
       caption="Source: Digital NHS, Acute Patient Level Activity and Costing, 2019-20")

2 Workforce of Healthcare

2.1 Healthcare workforce in the history

nhs_earnings<-read_csv("data/healthcare-spending/NHS Staff Annual Earnings Estimates to June 2022 in NHS Trusts and other core organisations in England, Provisional Statistics CSV text file.csv") %>%
  clean_names()

nhs_earnings<-nhs_earnings%>%
  mutate(year=format(date, format="%Y"))%>%
  mutate(month=format(date, format="%m"))%>%
  filter(!is.na(sample_size))%>%
  mutate(gov_spending=amount*sample_size)%>%
  mutate(year=format(date, format="%Y"))%>%
  mutate(month=format(date, format="%m"))

nhs_earnings_sep<-nhs_earnings%>%
  filter(month=="08")%>%
  filter(payment_type=="PUBGRP_010_BASIC_PAY_PER_FTE")%>%
  filter(staff_group=="All staff")
countperyear<-nhs_earnings%>%
  filter(payment_type=="PUBGRP_010_BASIC_PAY_PER_FTE")%>%
  filter(staff_group=="All staff")


nhs_earnings_sep<-nhs_earnings%>%
  filter(month=="08")%>%
  filter(payment_type=="PUBGRP_010_BASIC_PAY_PER_FTE")%>%
  filter(staff_group=="All staff")



g<-ggplot(countperyear,aes(x=date, y=sample_size/1000000))+geom_line()+
  labs(x="Date",y="Number of Workers (million)",title="Steady increase in NHS workers since 2013",subtitle = "Number of emplyees over time", caption = "NHS Workforce Statistics - July 2022 (NHS, 2022)")+
  geom_point(data=nhs_earnings_sep, aes(x=date, y=sample_size/1000000), colour="#F8766D") + 
  theme_nhs()
g

2.2 Staff Shortages

vacancy <- read.csv("data/vacancy/vacancy_region.csv") %>% 
  clean_names() %>% 
  rename(nhscr16nm = region) %>% 
  mutate(vacancy = as.numeric(str_remove(vacancy, "%")), 
         vacancy_number = as.numeric(str_remove_all(vacancy_number, ",")))
vacancy
##                      nhscr16nm vacancy vacancy_number
## 1                       London    10.9          26002
## 2             South of England     7.3          22969
## 3             North of England     6.3          26079
## 4 Midlands and East of England     8.2          30805
# make sure you have the same direcory stucture to get London wards shapefile
uk_map <- read_sf("data/vacancy/NHS_shape/NHS_Commissioning_Regions_April_2016_Generalised_Clipped_Boundaries_in_England.shp")

uk_map_wgs84 <-  uk_map %>%
  st_transform(4326)
uk_map_wgs84_with_value <- uk_map_wgs84 %>% 
  mutate(nhscr16nm = ifelse(nhscr16nm == "North Of England", "North of England", nhscr16nm)) %>%
  left_join(vacancy, by = "nhscr16nm")
vac_map <- uk_map_wgs84_with_value %>% 
  ggplot(aes()) +
  geom_sf(aes(fill = vacancy, text = paste(nhscr16nm, "\nVacancy rate: ", vacancy, "%", sep = "")), colour = "#ffffff", size = 0.1) + 
  scale_fill_gradientn("%Vacancy", colors = c("#153A70", "#235eb8", "#296ED6", "#3082FC"), 
                       labels = c("10%", "9%", "8%", "7%"), 
                       breaks = c(0.1, 0.09, 0.08, 0.07)) + 
  theme_nhs() + 
  theme(
    legend.position = "right"
  ) + 
  labs(title = "London NHS Vacancy was above 10% \n Source: NHS Digital, 2021/22 Q4 (Mar-22)")
ggplotly(vac_map, tooltip = "text")
physicians <- read_csv('data/vacancy/physicians_per_1000.csv', show_col_types = FALSE)
## New names:
## • `` -> `...67`
physicians_clean <- physicians %>%
  select("Country Name", "2010", "2019") %>%
  pivot_longer(cols = 2:3, names_to = "year", values_to = "physicians_per_1000") %>%
  mutate(year = as.Date(paste(year, 1, 1, sep = "-")),
         year = year(ymd(year))) %>%
  clean_names()
  

p1 <- physicians_clean %>%
  filter(year == 2019) %>%
  drop_na() %>%
  arrange(desc(physicians_per_1000)) %>%
  top_n(10) %>%
  mutate(color = as.factor(ifelse(country_name == "United Kingdom", 1, 0))) %>% 
  ggplot(aes(x = physicians_per_1000, y = fct_reorder(country_name, physicians_per_1000, max, desc = TRUE))) +
  geom_col(aes(fill = color)) + 
  scale_fill_manual(values = two_color) + 
  theme_nhs() + 
  labs(y = NULL, x = 2019) + 
  theme(
    panel.grid.major.x = element_line(color = "grey80"),  
    panel.grid.minor.x = element_line(color = "grey80")
  )

p2 <- physicians_clean %>%
  filter(year == 2010) %>%
  drop_na() %>%
  arrange(desc(physicians_per_1000)) %>%
  top_n(10) %>%
  mutate(color = as.factor(ifelse(country_name == "United Kingdom", 1, 0))) %>% 
  ggplot(aes(x = physicians_per_1000, y = fct_reorder(country_name, physicians_per_1000, max, desc = TRUE))) +
  geom_col(aes(fill = color)) + 
  scale_fill_manual(values = two_color) + 
  theme_nhs() + 
  labs(y = NULL, x = 2010) + 
  theme(
    panel.grid.major.x = element_line(color = "grey80"),  
    panel.grid.minor.x = element_line(color = "grey80")
  )

(p2 + p1) + 
  plot_annotation(
    title = "Physicians per 1k population increased from 2010 to 2019", 
    subtitle = "Year 2010 v.s. Year 2019", 
    caption = "NHS Vacancy Statistics England April 2015 – June 2022 Experimental Statistics"
  ) &
  theme_nhs() 

2.3 How has the NHS salary changed given inflation?

# NHS Staff Earnings Estimates (Source: https://digital.nhs.uk/data-and-information/publications/statistical/nhs-staff-earnings-estimates)
nhs_salary <- read_csv("data/healthcare-spending/NHS Staff Annual Earnings Estimates to June 2022 in NHS Trusts and other core organisations in England, Provisional Statistics CSV text file.csv") %>%
  clean_names()

#Average weekly earnings of people in the UK Whole Economy Level (£): Seasonally Adjusted Total Pay Excluding Arrears (Source:https://www.ons.gov.uk/employmentandlabourmarket/peopleinwork/earningsandworkinghours/timeseries/kab9/emp)

uk_salary <- read_csv("data/healthcare-spending/UK_salary.csv", skip=7)%>%
  clean_names() %>%
  filter(important_notes %in% c("2009", "2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021")) %>%
  mutate(yearly_salary_uk=x2*52) %>%
  rename(year=important_notes) %>%
  summarize(year, yearly_salary_uk)

nhs_salary2 <- nhs_salary %>%
  filter(payment_type == "PUBGRP_020_EARNINGS",
         staff_group == "All staff") %>%
  mutate(year = year(date)) %>%
  group_by(year) %>%
  summarize(yearly_earnings=sum(amount)) %>%
  filter(year != "2022") %>%
  rename(yearly_earnings_nhs=yearly_earnings)

salary_data <- merge(uk_salary, nhs_salary2, by="year")

salary_data1 <- pivot_longer(salary_data,
             cols=2:3,
             names_to="type",
             values_to="yearly_salary")
salary_data1 %>%
  ggplot(aes(x=year, y=yearly_salary, color=type)) +
  geom_point() + 
  theme_nhs() + 
  theme(legend.position = "none", 
        panel.grid.major = element_line(color = "grey80"),  
        panel.grid.minor = element_line(color = "grey80"),
        plot.title.position = "plot",
        plot.title = ggtext::element_textbox_simple(size=16)) + 
  scale_color_manual(values = c(palette[1], palette[7])) +
  labs(title = "<b> NHS salaries are bad... but maybe it's just a sign of a wider problem?</b><br>
       <span style = 'font-size:12pt'> Average salary (£) over the years in the <span style='color:#235eb8'>NHS</span> compared to the <span style='color:#b9bb5e'>UK average </span></span>",
       y = NULL, x = NULL, 
       caption = "Source: ONS Whole Economy Level (£): Seasonally Adjusted Total Pay, NHS Staff Earning Estimates")

3 Patient Experience

3.1 Data cleaning

# load data
wl <- read.csv("data/waiting_list/nhs_waiting_list_time_series.csv")

wl_ts <-wl %>% 
  pivot_longer(-month, names_to = "indicator", values_to = "value") %>% 
  separate(indicator, c("category", "index"), "_") %>% 
  mutate(category = str_replace_all(category, "\\.", "_"), 
         index = str_replace_all(str_replace_all(index, "\\.", "_"), 
                                 "__", "_")) %>% 
  mutate(index = if_else(str_detect(index, "_$"), 
                         substr(index, 1, nchar(index) - 1), index), 
         category = if_else(str_detect(category, "_$"), 
                            substr(category, 1, nchar(category) - 1), 
                            category)) %>% 
  mutate(index = if_else(str_detect(index, "No__"), 
                         str_replace(index, "No__", "Num_more_"), 
                         str_replace(index, "No", "Num"))) %>% 
  mutate(index = if_else(str_detect(index, "^_"), 
                         str_replace(
                           str_replace(index, "_within", "perc_within"), 
                           "__", "perc_"), 
                         index)) %>% 
  mutate(month = as.Date(paste("01-", substr(month, 4, 6), 
                               "-", substr(month, 1, 2), sep=""), format = "%d-%b-%y")) %>% 
  mutate(category = as.factor(category), 
         index = as.factor(index))
# show data
head(wl_ts)
## # A tibble: 6 × 4
##   month      category                index                     value
##   <date>     <fct>                   <fct>                     <dbl>
## 1 2010-01-01 Incomplete_RTT_pathways Median_wait_weeks           6.5
## 2 2010-01-01 Incomplete_RTT_pathways 92nd_percentile_weeks      20.3
## 3 2010-01-01 Incomplete_RTT_pathways Num_within_18_weeks   2075443  
## 4 2010-01-01 Incomplete_RTT_pathways perc_within_18_weeks        0.9
## 5 2010-01-01 Incomplete_RTT_pathways Num_more_18_weeks      239617  
## 6 2010-01-01 Incomplete_RTT_pathways Num_more_52_weeks       20737
skimr::skim(wl_ts)
Data summary
Name wl_ts
Number of rows 3060
Number of columns 4
_______________________
Column type frequency:
Date 1
factor 2
numeric 1
________________________
Group variables None

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
month 0 1 2010-01-01 2022-09-01 2016-05-01 153

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
category 0 1 FALSE 3 Inc: 1530, Adm: 765, Non: 765
index 0 1 FALSE 12 Med: 459, Num: 459, Num: 459, 95t: 306

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
value 270 0.91 331913.3 749873.8 0 7 182.5 276497.2 4257514 ▇▁▁▁▁

3.2 The Expending Waiting List

Since the pandemic, the number of patients waiting for treatment has been increasing.

3.2.1 Waiting List Expansion

The waiting list’s size has accually doubled compared with that 6 years ago.

data <- wl_ts %>% 
  filter(category == "Incomplete_RTT_pathways" &
         index %in% c("Num_within_18_weeks", "Num_more_18_weeks") & 
           month >= "2012-09-01") %>% 
  group_by(month) %>% 
  summarize(total_wl = sum(value))

p_wl_incomplete_rtt <- data %>% 
  ggplot(aes(x = month, y = total_wl)) + 
  theme_nhs() + 
  theme(panel.grid.major.y = element_line(color = "grey80"), 
        panel.grid.minor.y = element_line(color = "grey80"), 
        axis.text.x = element_text(angle=45, hjust = 1, vjust = 1), 
        axis.ticks = element_line(color = "grey80")) + 
  geom_bar(stat = "identity", fill = two_color[1]) + 
  scale_y_continuous(expand = c(0, 0), 
                     labels = unit_format(unit = "million", scale = 1e-6), 
                     breaks = seq(0, 7e6, 1e6)) + 
  scale_x_date(limits = as.Date(c("2012-09-01", "2022-09-01")), 
               expand = c(0, 0), date_labels = "%b-%y", 
             breaks = date_breaks(width = "6 month")) + 
  labs(x = NULL, y = NULL, 
       title = "NHS waiting list has been increasing since the pandemic", 
       subtitle = "Sep 2012 to Sep 2022", 
       caption = "Source: Consultant-led Referral to Treatment Waiting Times Data 2022-23"
       ) + 
  geom_vline(xintercept=as.numeric(as.Date("2020-03-01")), linetype=2, col = "black") + 
  annotate("text", x = as.Date("2018-09-01"), y = 6.5*1e6, 
           label = "Start of the pandemic", hjust = 0.5, color = "black")
p_wl_incomplete_rtt

3.2.2 Forever Waiting…

data <- wl_ts %>% 
  filter(category == "Incomplete_RTT_pathways" &
         index %in% c("Num_more_18_weeks", "Num_more_52_weeks") & 
           month >= "2012-09-01") %>% 
  group_by(month, index) %>% 
  summarize(total_wl = sum(value)) %>% 
  mutate(label = ifelse(month != as.Date("2022-09-01"), NA_character_, 
                        ifelse(index == "Num_more_18_weeks", "18+ w", "52+ w")))

p_wl_18_52 <- data %>% 
  ggplot(aes(x = month, y = total_wl, fill = as.factor(index))) + 
  theme_nhs() + 
  theme(axis.line.x = element_blank(), 
        axis.text.x = element_text(angle=45, hjust = 1, vjust = 1), 
        axis.ticks = element_line(color = "grey80")) + 
  geom_segment(
    data = tibble(y = seq(0, 3200000, 400000), 
                  x1 = as.Date("2012-09-01"), 
                  x2 = as.Date("2022-09-01")),
    aes(x = x1, xend = x2, y = y, yend = y),
    inherit.aes = FALSE,
    color = "grey80",
    size = .4
  ) +
  geom_segment(
    data = tibble(y = 0, x1 = as.Date("2012-09-01"), x2 = as.Date("2022-09-01")),
    aes(x = x1, xend = x2, y = y, yend = y),
    inherit.aes = FALSE,
    color = "grey40",
    size = .4
  ) + 
  geom_area(stat = "identity", position = "identity", color = two_color[1], alpha = 0.5) + 
  scale_fill_manual(values = c("#7393E6", palette[1])) + 
  scale_y_continuous(expand = c(0, 0), breaks = seq(0, 3200000, 400000), 
                     label = comma) + 
  labs(x = NULL, y = NULL, 
       title = "More patients waiting FOREVER since the pandemic", 
       subtitle = "Number of patients waiting over 18 and 52 weeks, Sep 2012 to Sep 2022", 
       caption = "Source: Consultant-led Referral to Treatment Waiting Times Data 2022-23"
       ) + 
  geom_vline(xintercept=as.numeric(as.Date("2020-03-01")), linetype=2, col = "black") + 
  annotate("text", x = as.Date("2018-09-01"), y = 3000000, 
           label = "Start of the pandemic", hjust = 0.5, color = "black") + 
  scale_x_date(limits = c(as.Date("2012-09-01"), as.Date("2024-03-01")), 
               date_labels = "%y-%b", expand = c(0, 0), 
               breaks = c(as.Date("2012-09-01"), as.Date("2013-03-01"), 
                          as.Date("2013-09-01"), as.Date("2014-03-01"), 
                          as.Date("2014-09-01"), as.Date("2015-03-01"), 
                          as.Date("2015-09-01"), as.Date("2016-03-01"), 
                          as.Date("2016-09-01"), as.Date("2017-03-01"), 
                          as.Date("2017-09-01"), as.Date("2018-03-01"), 
                          as.Date("2018-09-01"), as.Date("2019-03-01"), 
                          as.Date("2019-09-01"), as.Date("2020-03-01"), 
                          as.Date("2020-09-01"), as.Date("2021-03-01"), 
                          as.Date("2021-09-01"), as.Date("2022-03-01"), as.Date("2022-09-01")
                          ))+
  geom_text_repel(
    aes(color = index, label = label),
    xlim = c(as.Date("2022-09-01"), NA),
    family = "Helvetica",
    fontface = "bold",
    size = 4, 
    segment.size = .7,
    segment.alpha = .5,
    segment.linetype = "dotted"
  ) + 
  scale_color_manual(values = c("#7393E6", palette[1]))
p_wl_18_52

3.2.3 %Forever waiting

data <- wl_ts %>% 
  filter((category == "Incomplete_RTT_pathways") &
         (index %in% c("perc_within_18_weeks", "perc_52_weeks")) & 
           (month >= "2012-09-01")) %>% 
  group_by(month, index) %>% 
  summarize(total_wl = sum(value)) %>% 
  mutate(total_wl = ifelse(index == "perc_within_18_weeks", 1 - total_wl, total_wl), 
         index = ifelse(index == "perc_within_18_weeks", "perc_above_18_weeks", "perc_52_weeks")) %>% 
  mutate(label = ifelse(month != as.Date("2022-09-01"), NA_character_, 
                        ifelse(index == "perc_above_18_weeks", "%18+ w", "%52+ w")))

p_wl_18_52_perc <- data %>% 
  ggplot(aes(x = month, y = total_wl, fill = as.factor(index))) + 
  theme_nhs() + 
  theme(axis.line.x = element_blank(), 
        axis.text.x = element_text(angle=45, hjust = 1, vjust = 1), 
        axis.ticks = element_line(color = "grey80")) + 
  geom_segment(
    data = tibble(y = seq(0, 0.6, 0.05), 
                  x1 = as.Date("2012-09-01"), 
                  x2 = as.Date("2022-09-01")),
    aes(x = x1, xend = x2, y = y, yend = y),
    inherit.aes = FALSE,
    color = "grey80",
    size = .4
  ) +
  geom_segment(
    data = tibble(y = 0, x1 = as.Date("2012-09-01"), x2 = as.Date("2022-09-01")),
    aes(x = x1, xend = x2, y = y, yend = y),
    inherit.aes = FALSE,
    color = "grey40",
    size = .4
  ) + 
  geom_area(stat = "identity", position = "identity", color = two_color[1], alpha = 0.5) + 
  scale_fill_manual(values = c(palette[1], "#7393E6")) + 
  scale_y_continuous(labels = scales::percent, 
                     expand = c(0, 0), breaks = seq(0, 0.6, 0.05)) + 
  labs(x = NULL, y = NULL, 
       title = "More patients waiting FOREVER since the pandemic", 
       subtitle = "Percentage of patients waiting over 18 and 52 weeks, Sep 2012 to Sep 2022", 
       caption = "Source: Consultant-led Referral to Treatment Waiting Times Data 2022-23"
       ) + 
  geom_vline(xintercept=as.numeric(as.Date("2020-03-01")), linetype=2, col = "black") + 
  annotate("text", x = as.Date("2018-09-01"), y = 0.57, 
           label = "Start of the pandemic", hjust = 0.5, color = "black") + 
  scale_x_date(limits = c(as.Date("2012-09-01"), as.Date("2024-03-01")), 
               date_labels = "%y-%b", expand = c(0, 0), 
               breaks = c(as.Date("2012-09-01"), as.Date("2013-03-01"), 
                          as.Date("2013-09-01"), as.Date("2014-03-01"), 
                          as.Date("2014-09-01"), as.Date("2015-03-01"), 
                          as.Date("2015-09-01"), as.Date("2016-03-01"), 
                          as.Date("2016-09-01"), as.Date("2017-03-01"), 
                          as.Date("2017-09-01"), as.Date("2018-03-01"), 
                          as.Date("2018-09-01"), as.Date("2019-03-01"), 
                          as.Date("2019-09-01"), as.Date("2020-03-01"), 
                          as.Date("2020-09-01"), as.Date("2021-03-01"), 
                          as.Date("2021-09-01"), as.Date("2022-03-01"), as.Date("2022-09-01")
                          ))+
  geom_text_repel(
    aes(color = index, label = label),
    xlim = c(as.Date("2022-09-01"), NA),
    family = "Helvetica",
    fontface = "bold",
    size = 4, 
    segment.size = .7,
    segment.alpha = .5,
    segment.linetype = "dotted"
  ) + 
  scale_color_manual(values = c(palette[1], "#7393E6"))
p_wl_18_52_perc

3.2.4 Breakdown

bd <- read.csv("data/waiting_list/breakdown.csv")
names(bd) <- c("function_code", "function_name", "month", "total_wl", "total_within_18w", "perc_within_18w", "total_52plusw")
bd_clean <- bd %>% 
  mutate(
    month = as.Date(paste("01-", substr(month, 4, 6), 
                               "-", substr(month, 1, 2), sep=""), format = "%d-%b-%y")
  )
total_22 <- (bd_clean %>% filter(month == as.Date("2022-09-01")) %>% select(function_code, total_wl) %>% rename("Sep-2022" = "total_wl"))
total_19 <- (bd_clean %>% filter(month == as.Date("2019-09-01")) %>% select(function_code, total_wl) %>% rename("Sep-2019" = "total_wl"))
total_2yr <- total_19 %>% 
  left_join(total_22, by = "function_code") %>% 
  left_join(
  bd_clean %>% 
    filter(month == as.Date("2022-09-01")) %>% 
    select(function_code, function_name), by = "function_code"
) %>% 
  select(function_name, `Sep-2019`, `Sep-2022`)
total_2yr_4plot <- total_2yr %>% 
  rowwise() %>% 
  mutate( mean_tot = mean(c(`Sep-2019`,`Sep-2022`) )) %>% 
  arrange(mean_tot) %>% 
  mutate(function_name=factor(function_name, function_name))
 
# Plot
total_2yr_4plot %>% filter(function_name != "Other") %>% 
ggplot() +
  geom_segment( aes(x=function_name, xend=function_name, 
                    y=`Sep-2019`, yend=`Sep-2022`), color="grey20") +
  geom_point( aes(x=function_name, y=`Sep-2019`), color=two_color[1], size=3 ) +
  geom_point( aes(x=function_name, y=`Sep-2022`), color=two_color[2], size=3 ) +
  coord_flip()+
  theme_nhs() +
  theme(
    legend.position = "none",
    panel.grid.major = element_line(color = "grey80")
  ) +
  scale_y_continuous(label = comma) + 
  labs(
    title = "Waiting List Expansion across all services", 
    subtitle = "Sep 2019 v.s. Sep 2022", 
    y = "Total Number of Incomplete Pathways", x = "")